{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from transformers import BertTokenizer, BertModel\n",
    "import bert_score\n",
    "from nltk.translate.bleu_score import sentence_bleu\n",
    "import time\n",
    "from tqdm import tqdm\n",
    "\n",
    "import warnings\n",
    "warnings.filterwarnings(\"ignore\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "from llamaapi import LlamaAPI\n",
    "import json\n",
    "\n",
    "# Replace 'Your_API_Token' with your actual API token\n",
    "llama = LlamaAPI('YOUR API TOKEN')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "metadata": {},
   "outputs": [],
   "source": [
    "def query_llama(llama_reasoning, final_sample, context_column, output_path):\n",
    "    prompted_text_list = []\n",
    "    for i, row in final_sample.iterrows():\n",
    "        prompted_text = base_text + row[context_column]\n",
    "        prompted_text_list.append(prompted_text)\n",
    "\n",
    "    def query(prompt):\n",
    "    # API Request JSON Cell\n",
    "        api_request_json = {\n",
    "          \"model\": \"llama3-70b\",\n",
    "          \"messages\": [\n",
    "            {\"role\": \"system\", \"content\": \"Act as a nutritionist. Ananlyze if a given food is healthy to a user and why.\"},\n",
    "            {\"role\": \"user\", \"content\": prompt},\n",
    "          ]\n",
    "        }\n",
    "        # Make your request and handle the response\n",
    "        response = llama.run(api_request_json)\n",
    "        return response.json()['choices'][0]['message']['content']\n",
    "\n",
    "    \n",
    "    for prompt in tqdm(prompted_text_list):\n",
    "        try:\n",
    "            reasoning = query(prompt)\n",
    "        except:\n",
    "            reasoning = 'API Error'\n",
    "            print('API Error')\n",
    "        time.sleep(10)\n",
    "        llama_reasoning.append(reasoning)\n",
    "\n",
    "    # Create a DataFrame\n",
    "    gpt_results_df = pd.DataFrame({'GPT_results': llama_reasoning})\n",
    "\n",
    "    # Save to CSV\n",
    "    gpt_results_csv_path = '../processed_data/reasoning/' + output_path\n",
    "    gpt_results_df.to_csv(gpt_results_csv_path, index=False)\n",
    "\n",
    "    return llama_reasoning\n",
    "\n",
    "\n",
    "def evaluate(reasoning, df):\n",
    "    # Sample ground truth and GPT-generated results\n",
    "    ground_truths = df['ground_truth'].tolist()  # List of ground truth texts\n",
    "    gpt_results = reasoning\n",
    "\n",
    "    # Calculate BERT scores\n",
    "    P, R, F1 = bert_score.score(gpt_results, ground_truths, lang=\"en\", model_type=\"bert-base-uncased\")\n",
    "    bert_scores = F1.numpy()\n",
    "    bert_score_mean = np.mean(bert_scores)\n",
    "    bert_score_std = np.std(bert_scores)\n",
    "\n",
    "    # Calculate BLEU scores\n",
    "    gpt_scores = [sentence_bleu([gt.split()], gpt.split()) for gt, gpt in zip(ground_truths, gpt_results)]\n",
    "    bleu_score_mean = np.mean(gpt_scores)\n",
    "    bleu_score_std = np.std(gpt_scores)\n",
    "\n",
    "    # Create DataFrame for results\n",
    "    result_df = pd.DataFrame({\n",
    "        'GPT_results': gpt_results,\n",
    "        'BERT_score': bert_scores,\n",
    "        # 'BLEU_score': gpt_scores\n",
    "    })\n",
    "\n",
    "    # Summary\n",
    "    summary = {\n",
    "        'BERT_score_mean': bert_score_mean,\n",
    "        'BERT_score_std': bert_score_std,\n",
    "        'BLEU_score_mean': bleu_score_mean,\n",
    "        'BLEU_score_std': bleu_score_std\n",
    "    }\n",
    "    # Output results\n",
    "    print(summary)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "metadata": {},
   "outputs": [],
   "source": [
    "base_text = \"\"\"\n",
    "    Act as a nutritionist, your task is to use your knowledge to judge if the given food should be considered a healthy option to the user given their profiles. \n",
    "    Important Note: You must STRICTLY provide your answer in the following format WITHOUT any further explanations or other words:  <Yes or No>, because the food is: <high or low> in <nutrients>, <choose between high or low> in <nutrients>,…. (E.g. Yes, becuase the food is low in calories, low in sodium and high in protein).\n",
    "    Here is the input: \n",
    "\"\"\"\n",
    "final_sample = pd.read_csv('../processed_data/benchmark_reasoning.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 200/200 [41:16<00:00, 12.38s/it]\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'BERT_score_mean': 0.69880235, 'BERT_score_std': 0.10955263, 'BLEU_score_mean': 0.2621309652455708, 'BLEU_score_std': 0.09954877984334148}\n"
     ]
    }
   ],
   "source": [
    "# Raw \n",
    "llama_reasoning = []\n",
    "raw_reasoning = query_llama(llama_reasoning, final_sample, context_column='user_food_combined_prompts', output_path='raw_llama3.csv')\n",
    "evaluate(raw_reasoning, final_sample)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 200/200 [40:25<00:00, 12.13s/it]\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'BERT_score_mean': 0.70065427, 'BERT_score_std': 0.10495191, 'BLEU_score_mean': 0.2069737978992119, 'BLEU_score_std': 0.12186578613605656}\n"
     ]
    }
   ],
   "source": [
    "# Xrec\n",
    "xrec_reasoning = []\n",
    "xrec_reasoning = query_llama(xrec_reasoning, final_sample, context_column='food_considered_healthy', output_path='XRec_llama3.csv')\n",
    "evaluate(xrec_reasoning, final_sample)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 200/200 [40:24<00:00, 12.12s/it]\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'BERT_score_mean': 0.7427185, 'BERT_score_std': 0.03158122, 'BLEU_score_mean': 0.21532947428836777, 'BLEU_score_std': 0.09428363458290431}\n"
     ]
    }
   ],
   "source": [
    "# LLM2ER\n",
    "llm2er_reasoning = []\n",
    "llm2er_reasoning = query_llama(llm2er_reasoning, final_sample, context_column='user_food_liked', output_path='LLM2ER_llama3.csv')\n",
    "evaluate(llm2er_reasoning, final_sample)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 200/200 [40:34<00:00, 12.17s/it]\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'BERT_score_mean': 0.84127486, 'BERT_score_std': 0.07312379, 'BLEU_score_mean': 0.3698287272917877, 'BLEU_score_std': 0.19553086488228746}\n"
     ]
    }
   ],
   "source": [
    "our_reasoning = []\n",
    "our_reasoning = query_llama(our_reasoning, final_sample, context_column='new_prompt', output_path='Ours_llama3.csv')\n",
    "evaluate(our_reasoning, final_sample)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "metadata": {},
   "outputs": [],
   "source": [
    "raw_reasoning = pd.read_csv('../processed_data/reasoning/raw_llama2.csv')['GPT_results'].tolist()\n",
    "xrec_reasoning = pd.read_csv('../processed_data/reasoning/XRec_llama2.csv')['GPT_results'].tolist()\n",
    "llm2er_reasoning = pd.read_csv('../processed_data/reasoning/LLM2ER_llama2.csv')['GPT_results'].tolist()\n",
    "our_reasoning = pd.read_csv('../processed_data/reasoning/Ours_llama2.csv')['GPT_results'].tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'BERT_score_mean': 0.5581448, 'BERT_score_std': 0.064839534, 'BLEU_score_mean': 0.05527894270450269, 'BLEU_score_std': 0.07889836575127629}\n"
     ]
    }
   ],
   "source": [
    "evaluate(raw_reasoning, final_sample)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'BERT_score_mean': 0.53682, 'BERT_score_std': 0.048586834, 'BLEU_score_mean': 0.01013901214911789, 'BLEU_score_std': 0.02342373600577563}\n"
     ]
    }
   ],
   "source": [
    "evaluate(xrec_reasoning, final_sample)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'BERT_score_mean': 0.5560213, 'BERT_score_std': 0.04305631, 'BLEU_score_mean': 0.015683256726849937, 'BLEU_score_std': 0.028369411627361955}\n"
     ]
    }
   ],
   "source": [
    "evaluate(llm2er_reasoning, final_sample)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `beta` will be renamed internally to `bias`. Please use a different name to suppress this warning.\n",
      "A parameter name that contains `gamma` will be renamed internally to `weight`. Please use a different name to suppress this warning.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'BERT_score_mean': 0.56366456, 'BERT_score_std': 0.058120113, 'BLEU_score_mean': 0.014524161710214391, 'BLEU_score_std': 0.02932298963902566}\n"
     ]
    }
   ],
   "source": [
    "evaluate(our_reasoning, final_sample)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
